Ex_treme's blog.

基于用户的协同过滤算法(usercf )

2018/11/21 Share

基本公式

image

实现代码

步骤1:导包

1
2
3
4
5
6
7
import math
import operator
import sys

import util.reader as reader

sys.path.append("../util")

步骤2:主流程函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15

def main_flow():
"""
main flow of itmecf
:return:
"""
user_click, user_click_time = reader.get_user_click(
"/home/pzs741/PycharmProjects/CollaborativeFiltering/data/ratings.csv")
item_info = reader.get_item_info(
"/home/pzs741/PycharmProjects/CollaborativeFiltering/data/movies.csv")
item_click_by_user = transfer_user_click(user_click)
user_sim = cal_user_sim(item_click_by_user,user_click_time)
debug_user_sim(user_sim)
# recom_result = cal_recom_result(user_click,user_sim)
# debug_recom_result(item_info,recom_result)

步骤3:计算用户相似度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def cal_user_sim(item_click_by_user,user_click_time):
"""
get user sim info
:param item_click_by_user:dict, key itemid value:[itemid1,itemid2]
:return: dict, key itemid, value:sict, value_key:itemid_j, value_value:simscore
"""
co_appear = {}
user_click_count = {}
for itemid, user_list in item_click_by_user.items():
for index_i in range(0, len(user_list)):
user_i = user_list[index_i]
user_click_count.setdefault(user_i, 0)
user_click_count[user_i] += 1
if user_i + "_" + itemid not in user_click_time:
click_time_one = 0
else:
click_time_one = user_click_time[user_i + "_" + itemid]

for index_j in range(index_i + 1, len(user_list)):
user_j = user_list[index_j]
if user_j + "_" + itemid not in user_click_time:
click_time_two = 0
else:
click_time_two = user_click_time[user_j + "_" + itemid]

co_appear.setdefault(user_i, {})
co_appear[user_i].setdefault(user_j, 0)
# co_appear[user_i][user_j] += base_contribution_score()
# co_appear[user_i][user_j] += update_contribution_score(len(user_list))
co_appear[user_i][user_j] += update_two_contribution_score(click_time_one,click_time_two)

co_appear.setdefault(user_j, {})
co_appear[user_j].setdefault(user_i, 0)
# co_appear[user_j][user_i] += base_contribution_score()
# co_appear[user_j][user_i] += update_contribution_score(len(user_list))
co_appear[user_i][user_j] += update_two_contribution_score(click_time_one, click_time_two)

user_sim_info = {}
user_sim_info_sorted = {}
for user_i, relate_user in co_appear.items():
user_sim_info.setdefault(user_i, {})
for user_j, cotime in relate_user.items():
user_sim_info[user_i].setdefault(user_j, 0)
user_sim_info[user_i][user_j] = cotime / math.sqrt(user_click_count[user_i] * user_click_count[user_j])

for user in user_sim_info:
user_sim_info_sorted[user] = sorted(user_sim_info[user].items(), key=operator.itemgetter(1),
reverse=True)

return user_sim_info_sorted

步骤4:计算推荐结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
def cal_recom_result(user_click, user_sim):
"""
recom by usercd algo
:param user_click: dict, key userid, value [itemid1,itemid2]
:param user_sim: key:userid value: [(useridj,score1),useridk,score2),]
:return: dict, key userid value:sict value_key:itemid, value_value:recom_score
"""
recom_result = {}
tok_user = 3
item_num = 5
for user, item_list in user_click.items():
tmp_dict = {}
for itemid in item_list:
tmp_dict.setdefault(itemid, 1)
recom_result.setdefault(user, {})
for zuhe in user_sim[user][:tok_user]:
userid_j, sim_score = zuhe
if userid_j not in user_click:
continue
for itemid_j in user_click[userid_j][:item_num]:
recom_result[user].setdefault(itemid_j, sim_score)
return recom_result
CATALOG
  1. 1. 基本公式
  2. 2. 实现代码